#delimit ;
clear;
set more 1;

* Program calculates trade moments one country at a time;

***** READ data for China, Indonesia, Chile, and Colombia **************************************************************************;
* Comments need to be removed in this section to read the data for each country (and comments need to be inserted for the other countries **** ;

/*
** read chinese data *************;
use chinanewfinal,clear;
rename export EXP;
rename year Year;
rename newsitc PROD5D;
rename id PSID;
destring PROD5D,replace;
keep Year PSID PROD5D EXP ytotal;
save chinatemp,replace;

use industry2003,clear;
sort sitc;
save,replace;

use qy06,clear;
gen Year = 2006;
rename frdm PSID;
rename b07 sitc;
rename v209 ytotal;
rename v213 EXP;
keep PSID Year sitc EXP ytotal;
save china06,replace;

use qy2007,clear;
gen Year = 2007;
rename frdm PSID;
rename b07 sitc;
rename v209 ytotal;
rename v213 EXP;
keep Year PSID sitc EXP ytotal;
append using china06;
destring sitc,replace;
sort sitc;
merge sitc using industry2003;
keep if _merge == 3;
rename newsitc PROD5D;
drop sitc;
drop _merge;
append using chinatemp;

erase chinatemp.dta;
erase china06.dta;

replace EXP = 0 if EXP < 0;

collapse (sum) EXP ytotal (median) PROD5D, by(Year PSID);
gen dom = ytotal - EXP;
replace dom = ytotal if EXP == .;
replace dom = 0 if dom < 0;
save temp,replace;

** end read chinese data ******************************;
*/

/*
** read Indonesia data ********************************;
use Indonesia_allfirms,clear;
gen ytotal = DOM + EXP;
rename YEAR Year;
rename DOM dom;
save temp,replace;
** end read Indonesia data *****************************;
*/

/*
** read chile ******************************************;
use ENIA1995to2007_allfirms,clear;
 rename nui PSID;
  rename anio Year;
  rename ciiu3 PROD5D;
  rename fabval ytotal;
  rename expval EXP;
  gen dom = ytotal - EXP;
save temp,replace;
** end read chile data **************************************;
*/


** read colombia ********************************************;
use colombia_1981-1991;
  rename Plant_ID PSID;
  rename Industry_code PROD5D;
  rename Exports EXP;
  rename Total_Sales ytotal;
  gen dom = ytotal - EXP;
save temp,replace;
** end read colombia ****************************************;
**** END data extraction *****************************************************************************;

** start data processing for all countries ***********************************************************;
** This part is the same for all countries ***********************************************************;
** User may want to change the name of the output file -- it is currently indonesia.log **************;

** temp is the master dataset with all the observations for exporters and non-exporters;

** growth rate of total sales *********************************************************;
use temp,clear;
  collapse (sum) expsum=ytotal, by(Year);
  sort Year;
save totalexp,replace;

use totalexp,clear;
  replace Year = Year - 5;
  sort Year;
  rename expsum expsum_lead;
  keep Year expsum_lead;
save totalexp_lead,replace;  * total exports five years ahead;  

use totalexp,clear;
  merge Year using totalexp_lead;
  gen dexptotal = expsum_lead/expsum;
  drop if dexptotal == .;
  drop if dexptotal == 0;
  sort Year;
  keep Year dexptotal;
  * dexptotal is ratio of sales in t+5/t;
save totalexp,replace;  * dataset with total sales at t and change in sales between t and t+5;

erase totalexp_lead.dta;
** end of growth rate of total sales **********************************************************;

** create temp_lead *********************************************************************;
use temp,clear;
  replace Year = Year - 5;
  rename EXP exp_lead;
  rename dom dom_lead;
  rename ytotal ytotal_lead;
  sort Year;
  merge Year using totalexp;  * merge data with change in total sales;
  replace exp_lead = exp_lead/dexptotal;
  replace dom_lead = dom_lead/dexptotal;
  replace ytotal_lead = ytotal_lead/dexptotal;
  keep Year PROD5D PSID exp_lead dom_lead ytotal_lead;  
  * firm level data five years ahead with all firms;
  * expsum is total domestic sales in t;
  * normalized by total domestic sales ;
  sort Year PSID;
save temp_lead,replace;

erase totalexp.dta;

** temp and temp_lead are the key datasets with firm level data ********************;

** growth rate of domestic sales vs. exports **************************************;
use temp;
  drop if EXP == .;
  drop if EXP == 0;
  sort Year PSID;
  merge Year PSID using temp_lead;
  drop if _merge == 2;  * drop entrants ;
  
  * sample are exporters at t;
  * new exporters at t+5 are excluded;
  
  gen ds = (ytotal_lead - ytotal)/(ytotal + ytotal_lead)*2;
  replace ds = -2 if ytotal_lead == .;
  gen dy = (dom_lead - dom)/(dom + dom_lead)*2;
  replace dy = - 2 if dom_lead == .;
  gen dx = (exp_lead - EXP)/(exp_lead + EXP)*2;
  replace dx = -2 if exp_lead == .;
  gen pos = (ds > 0);
  * pos=1 for exporters with positive change in total sales; 
  collapse (mean) dy dx, by(Year pos);
  log using indonesia.log,replace;
  ** sample are exporters in year t;
  ** growth rate of domestic and export sales by expanding vs. contracting firms;
  ** expanding or contracting based on total sales;
  ** domestic sales, exports, and total sales normalized such that growth rate of total sales is zero in each five year period;
  list Year pos dy dx;
  drop if dy >= 2;
  drop if dy <= -2;
  drop if dx >= 2;
  drop if dx <= -2;
  collapse (mean) dy dx (max) Yearmax=Year (min) Yearmin=Year, by(pos);
  list pos dy dx Yearmax Yearmin;
  log close;
** end ***********************************************************************************************;

** growth rate of total exports by year ************************************************************;
use temp,clear;
  collapse (sum) expsum=EXP, by(Year);
  sort Year;
save totalexp,replace;

use totalexp,clear;
  replace Year = Year - 5;
  sort Year;
  rename expsum  expsum_lead;
  keep Year expsum_lead;
save totalexp_lead,replace;  * total exports five years ahead;  

use totalexp,clear;
  merge Year using totalexp_lead;
  gen dexptotal = expsum_lead/expsum;
  drop if dexptotal == .;
  drop if dexptotal == 0;
  sort Year;
  keep Year dexptotal;
save totalexp,replace;  
* dataset with total exports at t and change in exports between t and t+5;
* dexptotal are total exports at t+5/t;

erase totalexp_lead.dta;

** end of growth rate of total exports by year ********************************************************;

* normalize exp_lead by total exports ******************************************;
use temp,clear;
  replace Year = Year - 5;
  rename EXP exp_lead;
  sort Year;
  merge Year using totalexp;
  replace exp_lead = exp_lead/dexptotal;
  keep Year PROD5D PSID exp_lead;  * firm level data five years ahead;
  sort Year;
save temp_lead,replace;
* exp_lead is normalized exports at the firm level;

/*
** check;
use temp;
  collapse (sum) EXP, by(Year);
  list;

use temp_lead;
  collapse (sum) exp_lead, by(Year);
  list;
*/
  
erase totalexp.dta;

** statistics at the industry level *****************************************************************;
use temp;
  collapse (sum) EXP, by(Year PROD5D);
  sort Year PROD5D;
  egen expsum = sum(EXP), by(Year);
save tempind,replace;

use temp_lead;
  collapse (sum) exp_lead, by(Year PROD5D);
  sort Year PROD5D;
save tempind_lead,replace;

use tempind,clear;
  merge Year PROD5D using tempind_lead;
  gen dexp = (exp_lead - EXP);  * change in exports for each firm;
  replace dexp = -EXP if exp_lead ==.;
  replace dexp = exp_lead if EXP ==.;
  gen pos = (dexp > 0);
save tempind,replace;  * tempind has change in exports by industry;

use tempind,clear;
  collapse (sum) dexp (mean) expsum, by(Year pos);
  replace dexp = dexp/expsum;
  drop if dexp == .;
  
  log using indonesia.log,append;
  ** Industry level ;
  ** growth rate of exports by industry as a share of total exports (across all industries);
  ** Normalized such that growth rate of total exports is zero in each five year period;
  list Year pos dexp;
  drop if dexp >= .98;
  drop if dexp <= -.98;
  collapse (mean) dexp (max) Yearmax=Year (min) Yearmin=Year, by(pos);
  list pos dexp Yearmax Yearmin;
  log close;
clear;

erase tempind_lead.dta;

** end of industry level ********************************************************************;

** now to firm level ************************************************************************;

use tempind,clear;
  rename pos posind;
  keep Year PROD5D expsum posind;
  sort Year PROD5D;
save tempind,replace;  

use temp,clear;
  drop if EXP <= 0;
  drop if EXP == .;
  sort Year PROD5D PSID;
save temp,replace;

use temp_lead,clear;
  drop if exp_lead <= 0;
  drop if exp_lead == .;
  sort Year PROD5D PSID;
  keep Year PROD5D PSID exp_lead;
save temp_lead,replace;

use temp,clear;
  merge Year PROD5D PSID using temp_lead;  * need to match by industry and firmid;
  gen dexp = (exp_lead - EXP);  * incumbents;
  replace dexp = -EXP if exp_lead ==.; *exiters;
  replace dexp = exp_lead if EXP ==.;  * entrants;
  
  gen pos = (dexp > 0);  * at firm level ;

  sort Year PROD5D;
  drop _merge;
  merge Year PROD5D using tempind;
    
  collapse (sum) dexp (mean) expsum posind, by(Year PROD5D pos);
  
  sort Year PROD5D;
  replace dexp = dexp/expsum;

  drop if pos == .;
  drop if posind == .;
  collapse (sum) dexp, by(Year posind pos);
  save bs,replace;
  
  log using Indonesia.log,append;
  drop if dexp == .;
  ** firm level, average across industries;
  ** posind=1 are expanding industries;
  list Year posind pos dexp;
  drop if dexp >= .98;
  drop if dexp <= -.98;
  collapse (mean) dexp (max) Yearmax=Year (min) Yearmin=Year, by(posind pos);
  list posind pos dexp Yearmax Yearmin;
  log close;

** now extensive margin share of creation and destruction ***;

use temp,clear;
  merge Year PROD5D PSID using temp_lead;  * need to match by industry and firmid;
  gen dexp = (exp_lead - EXP);  * incumbents;
  gen entry = 0;
  gen exit = 0;
  replace dexp = -EXP if exp_lead ==.; *exiters;
  replace exit = 1 if exp_lead == .;
  replace dexp = exp_lead if EXP ==.;  * entrants;
  replace entry = 1 if EXP == .;
  gen pos = (dexp > 0);  * at firm level ;

  
  sort Year PROD5D;
  drop _merge;
  merge Year PROD5D using tempind;
  
  drop if posind == 1 & exit == 1;
  drop if posind == 1 & entry == 0;
  drop if posind == 0 & entry == 1;
  drop if posind == 0 & exit == 0;
  collapse (sum) dexp (mean) expsum posind, by(Year PROD5D pos);
  
  sort Year PROD5D;
  replace dexp = dexp/expsum;

  drop if pos == .;
  drop if posind == .;
  collapse (sum) dexp, by(Year posind pos);
  rename dexp dexp_extensive;
  sort Year posind pos;
  merge Year posind pos using bs;
  keep if _merge == 3;
  erase bs.dta;
  gen extensive = dexp_extensive/dexp;
  
  log using Indonesia.log,append;
  drop if extensive == .;
  drop if extensive == 1;
  ** share of extensive margin;
  ** posind=1 are expanding industries;
  list Year posind pos extensive;
  collapse (mean) extensive (max) Yearmax=Year (min) Yearmin=Year, by(posind pos);
  list posind pos extensive Yearmax Yearmin;
  log close;
clear;
  
erase temp.dta;
erase temp_lead.dta;
erase tempind.dta;


  


